{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-06T01:34:02.280536Z",
     "start_time": "2024-06-06T01:33:56.000585Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Building prefix dict from the default dictionary ...\n",
      "Loading model from cache /tmp/jieba.cache\n",
      "Loading model cost 0.307 seconds.\n",
      "Prefix dict has been built successfully.\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import mindspore\n",
    "from mindspore import ops\n",
    "from mindspore.amp import StaticLossScaler\n",
    "from mindnlp.dataset import load_dataset\n",
    "from mindnlp.core.serialization import safe_load_file\n",
    "from mindnlp.transformers.models.gpt_neox import (\n",
    "    GPTNeoXForCausalLM,\n",
    ")\n",
    "from mindnlp.engine import TrainingArguments, Trainer\n",
    "from mindnlp.transformers import AutoTokenizer\n",
    "from mindnlp.peft import (\n",
    "    get_peft_model,\n",
    "    LoraConfig,\n",
    "    TaskType,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-06T01:34:11.990480Z",
     "start_time": "2024-06-06T01:34:05.635489Z"
    }
   },
   "outputs": [],
   "source": [
    "# Replace the HuggingFace download link with hf-mirror.\n",
    "# os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'\n",
    "\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"EleutherAI/pythia-410m-deduped\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-06T01:34:11.994950Z",
     "start_time": "2024-06-06T01:34:11.992267Z"
    }
   },
   "outputs": [],
   "source": [
    "max_length = 512\n",
    "lora_r = 8\n",
    "lora_alpha = 16\n",
    "lora_dropout = 0.1\n",
    "\n",
    "lr = 1e-4\n",
    "weight_decay = 0.01\n",
    "num_train_epochs = 1\n",
    "batch_size = 2\n",
    "\n",
    "# Log the progress every set steps\n",
    "logging_steps = 100\n",
    "# Save the model every set steps\n",
    "save_steps = 500\n",
    "save_total_limit = 4\n",
    "\n",
    "# Set scaling value for loss scale\n",
    "scale_value = 2**5\n",
    "# Set label smoothing\n",
    "label_smoothing_factor = 0.0\n",
    "\n",
    "output_dir = \"output\"\n",
    "# resume_from_checkpoint = os.path.join(output_dir, \"checkpoint-3500\")\n",
    "resume_from_checkpoint = None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = GPTNeoXForCausalLM.from_pretrained(\"EleutherAI/pythia-410m-deduped\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-05T15:53:58.926532Z",
     "start_time": "2024-06-05T15:53:45.049310Z"
    }
   },
   "outputs": [],
   "source": [
    "# Load Belle dataset\n",
    "# https://huggingface.co/datasets/BelleGroup/train_0.5M_CN/blob/main/Belle_open_source_0.5M.json\n",
    "ds = load_dataset(\"BelleGroup/train_0.5M_CN\")\n",
    "# ds = load_dataset(\"./data\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-05T15:53:58.931040Z",
     "start_time": "2024-06-05T15:53:58.927772Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['instruction', 'input', 'output']\n",
      "<class 'str'> ('给定一个英文句子，翻译成中文。\\nI love to learn new things every day.\\n', '', '我每天喜欢学习新事物。')\n"
     ]
    }
   ],
   "source": [
    "# View dataset information.\n",
    "print(ds.column_names)\n",
    "print(type(ds.source[0][0]), ds.source[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-05T15:53:58.936377Z",
     "start_time": "2024-06-05T15:53:58.932558Z"
    }
   },
   "outputs": [],
   "source": [
    "# Splitting the training set and test set.\n",
    "train_dataset, eval_dataset = ds.split([0.9, 0.1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-05T15:53:58.940885Z",
     "start_time": "2024-06-05T15:53:58.937275Z"
    }
   },
   "outputs": [],
   "source": [
    "# Set Prompt\n",
    "BOS_TOKEN = tokenizer.bos_token\n",
    "EOS_TOKEN = tokenizer.eos_token\n",
    "def add_result_token(instruction, input, output):\n",
    "    # Retrieve specific value through item.\n",
    "    instruction = instruction.item()\n",
    "    output = output.item()\n",
    "    input_text = \"Human: \" + instruction + \"\\n\\nAssistant: \"\n",
    "    input_text = BOS_TOKEN + input_text if BOS_TOKEN != None else input_text\n",
    "    response = input_text + output + EOS_TOKEN\n",
    "    return response\n",
    "\n",
    "train_dataset = train_dataset.map(add_result_token, input_columns=['instruction', 'input', 'output'], output_columns=['inputs'])\n",
    "eval_dataset = eval_dataset.map(add_result_token, ['instruction', 'input', 'output'], ['inputs'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-05T15:54:00.011074Z",
     "start_time": "2024-06-05T15:53:58.942028Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'inputs': Tensor(shape=[], dtype=String, value= '<|endoftext|>Human: 给定一组数据，请确定其中是否存在异常值。\\n数据: 10, 20, 15, 25, 30, 500\\n\\nAssistant: 是，存在异常值500。<|endoftext|>')}"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "next(train_dataset.create_dict_iterator())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-05T15:54:00.014530Z",
     "start_time": "2024-06-05T15:54:00.011869Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# set pad_token\n",
    "tokenizer.pad_token = tokenizer.eos_token\n",
    "tokenizer.padding_side = \"left\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-05T15:54:00.018962Z",
     "start_time": "2024-06-05T15:54:00.015637Z"
    }
   },
   "outputs": [],
   "source": [
    "def tokenize_prompt(inputs):\n",
    "    \"\"\"\n",
    "    Convert the sentence into token ids using a tokenizer.\n",
    "    \"\"\"\n",
    "    result = tokenizer(\n",
    "        inputs.item(),\n",
    "        truncation=True,\n",
    "        max_length=max_length,\n",
    "        padding=False,\n",
    "    )\n",
    "    result[\"labels\"] = result[\"input_ids\"].copy()\n",
    "    return result[\"input_ids\"], result[\"attention_mask\"], result[\"labels\"]\n",
    "\n",
    "def dataset_batch(dataset, shuffle=False, buffer_size=16):\n",
    "    \"\"\"\n",
    "    Split the dataset into batches and use dynamic padding to ensure\n",
    "    that each batch has a consistent length.\n",
    "    \"\"\"\n",
    "    if shuffle:\n",
    "        dataset = dataset.shuffle(buffer_size).map(\n",
    "            tokenize_prompt,\n",
    "            [\"inputs\"],\n",
    "            [\"input_ids\", \"attention_mask\", \"labels\"]\n",
    "        )\n",
    "    else:\n",
    "        dataset = dataset.map(\n",
    "            tokenize_prompt,\n",
    "            [\"inputs\"],\n",
    "            [\"input_ids\", \"attention_mask\", \"labels\"]\n",
    "        )\n",
    "\n",
    "    dataset = dataset.padded_batch(\n",
    "        batch_size,\n",
    "        pad_info={\n",
    "            'input_ids': (None, tokenizer.pad_token_id),\n",
    "            'attention_mask': (None, 0),\n",
    "            'labels': (None, tokenizer.pad_token_id)\n",
    "        }\n",
    "    )\n",
    "    return dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-05T15:54:00.022565Z",
     "start_time": "2024-06-05T15:54:00.019938Z"
    }
   },
   "outputs": [],
   "source": [
    "# train_dataset = dataset_batch(train_dataset, shuffle=True)\n",
    "train_dataset = dataset_batch(train_dataset)\n",
    "eval_dataset = dataset_batch(eval_dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-05T15:54:01.077189Z",
     "start_time": "2024-06-05T15:54:00.023654Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'input_ids': Tensor(shape=[2, 149], dtype=Int64, value=\n",
       " [[    0, 22705,    27 ... 14318,   111,     0],\n",
       "  [    0, 22705,    27 ...     0,     0,     0]]),\n",
       " 'attention_mask': Tensor(shape=[2, 149], dtype=Int64, value=\n",
       " [[1, 1, 1 ... 1, 1, 1],\n",
       "  [1, 1, 1 ... 0, 0, 0]]),\n",
       " 'labels': Tensor(shape=[2, 149], dtype=Int64, value=\n",
       " [[    0, 22705,    27 ... 14318,   111,     0],\n",
       "  [    0, 22705,    27 ...     0,     0,     0]])}"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# View the input token.\n",
    "next(train_dataset.create_dict_iterator())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-05T15:54:01.169014Z",
     "start_time": "2024-06-05T15:54:01.078070Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "trainable params: 786,432 || all params: 406,120,448 || trainable%: 0.1936450143973051\n"
     ]
    }
   ],
   "source": [
    "# creating peft model\n",
    "peft_config = LoraConfig(\n",
    "    task_type=TaskType.CAUSAL_LM,\n",
    "    inference_mode=False,\n",
    "    r=lora_r,\n",
    "    lora_alpha=lora_alpha,\n",
    "    lora_dropout=lora_dropout,\n",
    "    bias=\"none\",\n",
    "    )\n",
    "\n",
    "model = get_peft_model(model, peft_config)\n",
    "model.print_trainable_parameters()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-05T15:54:01.173001Z",
     "start_time": "2024-06-05T15:54:01.170604Z"
    }
   },
   "outputs": [],
   "source": [
    "# Set Training Param\n",
    "training_args = TrainingArguments(\n",
    "    output_dir=output_dir,\n",
    "    evaluation_strategy=\"epoch\",\n",
    "    learning_rate=lr,\n",
    "    weight_decay=weight_decay,\n",
    "    logging_steps=logging_steps,\n",
    "    save_steps=save_steps,\n",
    "    save_total_limit=save_total_limit,\n",
    "    num_train_epochs=num_train_epochs,\n",
    "    label_smoothing_factor=label_smoothing_factor,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-05T15:54:01.179526Z",
     "start_time": "2024-06-05T15:54:01.174013Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "class CausalLMTrainer(Trainer):\n",
    "    \"\"\"\n",
    "    Used for GPTNeoX CausalLM training.\n",
    "    \"\"\"\n",
    "    def __init__(\n",
    "        self,\n",
    "        model = None,\n",
    "        args = None,\n",
    "        map_fn = None,\n",
    "        train_dataset = None,\n",
    "        eval_dataset = None,\n",
    "        tokenizer = None,\n",
    "        model_init = None,\n",
    "        compute_metrics = None,\n",
    "        callbacks = None,\n",
    "        optimizers = (None, None),\n",
    "        preprocess_logits_for_metrics = None,\n",
    "    ):\n",
    "        \"\"\"\n",
    "        Modified from Trainer.\n",
    "        \"\"\"\n",
    "        self.loss_scaler = StaticLossScaler(scale_value=scale_value)\n",
    "\n",
    "        super().__init__(\n",
    "            model = model,\n",
    "            args = args,\n",
    "            map_fn = map_fn,\n",
    "            train_dataset = train_dataset,\n",
    "            eval_dataset = eval_dataset,\n",
    "            tokenizer = tokenizer,\n",
    "            model_init = model_init,\n",
    "            compute_metrics = compute_metrics,\n",
    "            callbacks = callbacks,\n",
    "            optimizers = optimizers,\n",
    "            preprocess_logits_for_metrics = preprocess_logits_for_metrics,\n",
    "        )\n",
    "\n",
    "    def training_step(self, model, inputs):\n",
    "        \"\"\"\n",
    "        Modified from Trainer.\n",
    "        \n",
    "        Perform a training step on a batch of inputs.\n",
    "        \"\"\"\n",
    "        model.set_train()\n",
    "        inputs = self._prepare_inputs(inputs)\n",
    "\n",
    "        def forward(inputs):\n",
    "            loss = self.compute_loss(model, inputs)\n",
    "            # Loss scale\n",
    "            loss = self.loss_scaler.scale(loss)\n",
    "            return loss\n",
    "        \n",
    "        if getattr(self, 'grad_fn', None) is None or self.model_reload:\n",
    "            self.grad_fn = mindspore.value_and_grad(forward, None, self.optimizer.parameters)\n",
    "\n",
    "        loss, grads = self.grad_fn(inputs)\n",
    "        # Try using Loss scale\n",
    "        loss = self.loss_scaler.unscale(loss)\n",
    "        grads = self.loss_scaler.unscale(grads)\n",
    "\n",
    "        return loss / self.args.gradient_accumulation_steps, grads\n",
    "\n",
    "    def compute_loss(self, model, inputs, return_outputs=False):\n",
    "        \"\"\"\n",
    "        Modified from Trainer.\n",
    "\n",
    "        How the loss is computed by Trainer. By default, all models return the loss in the first element.\n",
    "\n",
    "        Subclass and override for custom behavior.\n",
    "        \"\"\"\n",
    "        if self.label_smoother is not None and \"labels\" in inputs:\n",
    "            labels = inputs.pop(\"labels\")\n",
    "        else:\n",
    "            labels = None\n",
    "        outputs = model(**inputs)\n",
    "        # Save past state if it exists\n",
    "        if self.args.past_index >= 0:\n",
    "            self._past = outputs[self.args.past_index]\n",
    "\n",
    "        if labels is not None:\n",
    "            # unwrapped_model = self.accelerator.unwrap_model(model)\n",
    "            loss = self.label_smoother(outputs, labels, shift_labels=True)\n",
    "        else:\n",
    "            shift_logits = outputs[\"logits\"][:, :-1, :]\n",
    "            labels = inputs[\"labels\"][:, 1:]\n",
    "            loss = ops.cross_entropy(shift_logits.view(-1, shift_logits.shape[-1]).to(mindspore.float32), labels.view(-1)).to(mindspore.float16)\n",
    "\n",
    "        return (loss, outputs) if return_outputs else loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-05T15:54:01.185564Z",
     "start_time": "2024-06-05T15:54:01.180382Z"
    }
   },
   "outputs": [],
   "source": [
    "trainer = CausalLMTrainer(\n",
    "    model=model,\n",
    "    args=training_args,\n",
    "    train_dataset=train_dataset,\n",
    "    eval_dataset=eval_dataset,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2024-06-05T15:54:01.186598Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "39ae170310864a1fa0a58167a70edf93",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/233665 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 2.5609, 'learning_rate': 9.999999747378752e-05, 'epoch': 0.0}\n",
      "{'loss': 1.4842, 'learning_rate': 9.999999747378752e-05, 'epoch': 0.0}\n",
      "{'loss': 1.4411, 'learning_rate': 9.999999747378752e-05, 'epoch': 0.0}\n"
     ]
    }
   ],
   "source": [
    "trainer.train(resume_from_checkpoint=resume_from_checkpoint)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Evaluate\n",
    "\n",
    "# import math\n",
    "# eval_results = trainer.evaluate()\n",
    "# print(f\"Perplexity: {math.exp(eval_results['eval_loss']):.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-06T01:34:17.110976Z",
     "start_time": "2024-06-06T01:34:17.108797Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Perform inference on the trained model\n",
    "peft_config = LoraConfig(\n",
    "    task_type=TaskType.CAUSAL_LM,\n",
    "    inference_mode=False,\n",
    "    r=lora_r,\n",
    "    lora_alpha=lora_alpha,\n",
    "    lora_dropout=lora_dropout,\n",
    "    bias=\"none\",\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-06T01:38:38.381431Z",
     "start_time": "2024-06-06T01:38:38.378812Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Set Prompt\n",
    "input_text = \"Human: \" + \"你是谁？\\n\" + \"\\n\\nAssistant: \"\n",
    "input_text = tokenizer.bos_token + input_text if tokenizer.bos_token is not None else input_text\n",
    "\n",
    "inputs = tokenizer(input_text, return_tensors=\"ms\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-06T01:38:59.454076Z",
     "start_time": "2024-06-06T01:38:58.336727Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "model = GPTNeoXForCausalLM.from_pretrained(\"EleutherAI/pythia-410m-deduped\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-06T01:39:04.772518Z",
     "start_time": "2024-06-06T01:38:59.455272Z"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/flysky/code/mindnlp/llm/peft/lora/train_gpt_neox/../../../../mindnlp/transformers/generation/utils.py:1561: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation. Please use and modify the model generation configuration (see https://hf-mirror.com/docs/transformers/generation_strategies#default-text-generation-configuration )\n",
      "  warnings.warn(\n",
      "Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--- RAW_MODEL ---\n",
      "<|endoftext|>Human: 你是谁？\n",
      "\n",
      "\n",
      "Assistant: 你是谁？\n",
      "\n",
      "Human: 你是谁？\n",
      "\n",
      "Assistant: 你是谁？\n",
      "\n",
      "Human: 你是谁？\n",
      "\n",
      "Assistant: 你是谁？\n",
      "\n",
      "Human\n",
      "---\n"
     ]
    }
   ],
   "source": [
    "output_ids = model.generate(**inputs, max_new_tokens=50)\n",
    "output_str = tokenizer.batch_decode(output_ids)[0]\n",
    "print(f\"--- RAW_MODEL ---\")\n",
    "print(output_str)\n",
    "print(\"---\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load peft model\n",
    "model = get_peft_model(model, peft_config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-06T01:39:05.094230Z",
     "start_time": "2024-06-06T01:39:04.773390Z"
    }
   },
   "outputs": [],
   "source": [
    "# Load the trained weights\n",
    "state_dict = safe_load_file(os.path.join(output_dir, \"checkpoint-14500/model.safetensors\"))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "([], [])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# If the output is '([], [])', it means that all weights are loaded\n",
    "model.load_state_dict(state_dict, False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2024-06-06T01:39:53.915015Z",
     "start_time": "2024-06-06T01:39:32.712792Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--- LORA_MODEL ---\n",
      "<|endoftext|>Human: 你是谁？\n",
      "\n",
      "\n",
      "Assistant: 我是一名AI语言模型，可以用于训练和测试。<|endoftext|>\n",
      "---\n"
     ]
    }
   ],
   "source": [
    "output_ids = model.generate(**inputs, max_new_tokens=50)\n",
    "output_str = tokenizer.batch_decode(output_ids)[0]\n",
    "print(f\"--- LORA_MODEL ---\")\n",
    "print(output_str)\n",
    "print(\"---\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "mindnlp",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
